OHI British Columbia | OHI Science | Citation policy
knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'Figs/',
echo = TRUE, message = FALSE, warning = FALSE)
library(raster)
library(data.table)
source('~/github/src/R/common.R') ###
### includes library(tidyverse); library(stringr); dir_M points to ohi directory
dir_git <- '~/github/spp_health_dists'
dir_am_data <- file.path(dir_M, 'git-annex/globalprep/_raw_data/aquamaps/d2017')
### goal specific folders and info
dir_setup <- file.path(dir_git, 'data_setup')
dir_data <- file.path(dir_git, 'data')
dir_o_anx <- file.path(dir_O, 'git-annex/spp_health_dists')
# ### provenance tracking
# library(provRmd); prov_setup()
### support scripts
source('~/github/src/R/rast_tools.R')
### raster plotting and analyzing scriptsCreate a map of the distribution of current species health - all species in AquaMaps assessed by IUCN (using AquaMaps version of IUCN ID numbers)
Here we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or cat_score.
cell_risk_file <- file.path(dir_data, 'risk_by_cell_all.csv')
iucn_to_am_lookup <- read_csv(file.path(dir_setup, 'int',
'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_current_risk <- read_csv(file.path(dir_data, 'iucn_spp_cat_current.csv')) %>%
select(iucn_sid, cat, cat_score) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
as.data.table()
if(!exists('spp_cells')) {
spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
}##
Read 43.7% of 15023644 rows
Read 79.9% of 15023644 rows
Read 15023644 rows and 3 (of 3) columns from 0.312 GB file in 00:00:04
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### using data.table join:
spp_cells_risk <- spp_current_risk[spp_cells, on = 'am_sid']
spp_cells_risk_sum <- spp_cells_risk %>%
filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = n(),
mean_risk = mean(cat_score))
write_csv(spp_cells_risk_sum, cell_risk_file)loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
### gotta force the mean_risk column to be double; there are lots of zero
### values so will default to thinking that column is integer.
risk_by_cell <- read_csv(cell_risk_file,
col_types = 'iiddd')
risk_rast <- subs(loiczid_rast, risk_by_cell, by = 'loiczid', which = 'mean_risk')
nspp_rast <- subs(loiczid_rast, risk_by_cell, by = 'loiczid', which = 'n_spp')
plot(risk_rast)plot(nspp_rast)risk_clipped <- risk_rast
values(risk_clipped)[values(nspp_rast) < 5] <- NA
plot(risk_clipped)Looks skewed; does it make sense to look at log risk? Looks like risk (with zeros excluded) might be distributed closer to log-normal.
mean_risk_hist <- ggplot(risk_by_cell) +
ggtheme_plot() +
geom_histogram(aes(x = mean_risk))
print(mean_risk_hist)Again we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or trend_score.
cell_trend_file <- file.path(dir_data, 'spp_trend_by_loiczid.csv')
iucn_to_am_lookup <- read_csv(file.path(dir_data, 'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_trend <- read_csv(file.path(dir_data, 'trend_by_spp.csv')) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
as.data.table()
if(!exists('spp_cells')) {
spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
}
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### using data.table join:
spp_cells_trend <- spp_trend[spp_cells, on = 'am_sid']
spp_cells_trend_sum <- spp_cells_trend %>%
filter(!is.na(trend_score) & !is.na(iucn_sid)) %>%
filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = n(), mean_trend = mean(trend_score))
write_csv(spp_cells_trend_sum, cell_trend_file)loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
### gotta force the mean_trend column to be double; there are lots of zero
### values so will default to thinking that column is integer.
trend_by_cell <- read_csv(file.path(dir_data, 'spp_trend_by_loiczid.csv'),
col_types = 'iid')
trend_rast <- subs(loiczid_rast, trend_by_cell, by = 'loiczid', which = 'mean_trend')
nspp_tr_rast <- subs(loiczid_rast, trend_by_cell, by = 'loiczid', which = 'n_spp')
plot(trend_rast)plot(nspp_tr_rast)trend_clipped <- trend_rast
values(trend_clipped)[values(nspp_tr_rast) < 5] <- NA
plot(trend_clipped)Here we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or cat_score.
cell_risk_by_range_file <- c(file.path(dir_data, 'risk_by_cell_by_range.csv'))
iucn_to_am_lookup <- read_csv(file.path(dir_data, 'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_ranges <- read_csv(file.path(dir_data, 'am_spp_range_summary.csv')) %>%
arrange(area_km2) %>%
mutate(cum_area = cumsum(area_km2),
range_gp = case_when(cum_area <= max(cum_area) * .33 ~ 'small',
cum_area <= max(cum_area) * .67 ~ 'medium',
TRUE ~ 'large')) %>%
select(am_sid, iucn_sid, area_km2, range_gp)
# summary(spp_ranges %>% filter(range_gp == 'small') %>% .$area_km2)
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 2522 400481 1180425 2192586 3739745 7681908
# summary(spp_ranges %>% filter(range_gp == 'medium') %>% .$area_km2)
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 7690359 8286343 9184440 15440048 12563147 103892977
# summary(spp_ranges %>% filter(range_gp == 'large') %>% .$area_km2)
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 105087230 136623452 159164037 167382766 196088390 245559322
spp_current_risk <- read_csv(file.path(dir_data, 'iucn_spp_cat_current.csv')) %>%
select(iucn_sid, cat, cat_score) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
left_join(spp_ranges) %>%
as.data.table()
if(!exists('spp_cells')) {
spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
}
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### using data.table join:
spp_cells_risk <- spp_current_risk[spp_cells, on = 'am_sid']
spp_cells_risk_sum <- spp_cells_risk %>%
filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
filter(prob >= 0.60) %>%
group_by(loiczid, range_gp) %>%
summarize(n_spp = n(),
mean_risk = mean(cat_score))
write_csv(spp_cells_risk_sum, cell_risk_by_range_file)loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
### gotta force the mean_risk column to be double; there are lots of zero
### values so will default to thinking that column is integer.
risk_by_cell_by_range <- read_csv(file.path(dir_data, 'risk_by_cell_by_range.csv'),
col_types = 'dcdd')
for(range_size in c('small', 'medium', 'large')) {
### range_size <- 'medium'
tmp <- risk_by_cell_by_range %>%
filter(range_gp == range_size) %>%
filter(n_spp >= 5)
risk_rast <- subs(loiczid_rast, tmp,
by = 'loiczid', which = 'mean_risk')
nspp_rast <- subs(loiczid_rast, tmp,
by = 'loiczid', which = 'n_spp')
plot(risk_rast, main = paste0('Mean risk for spp w/range ', range_size, ' (n >= 5)'))
plot(nspp_rast, main = paste0('Spp count for spp w/range ', range_size, ' (n >= 5)'))
}Here we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or cat_score.
cell_risk_by_taxa_file <- c(file.path(dir_data, 'risk_by_cell_by_taxa.csv'))
iucn_to_am_lookup <- read_csv(file.path(dir_data, 'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_taxa <- read_csv(file.path(dir_data, 'am_spp_taxa.csv')) %>%
select(am_sid, iucn_sid, taxa) %>%
distinct()
spp_current_risk <- read_csv(file.path(dir_data, 'iucn_spp_cat_current.csv')) %>%
select(iucn_sid, cat, cat_score) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
left_join(spp_taxa) %>%
as.data.table()
if(!exists('spp_cells')) {
spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
}
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### using data.table join:
spp_cells_risk <- spp_current_risk[spp_cells, on = 'am_sid']
spp_cells_risk_sum <- spp_cells_risk %>%
filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
filter(prob >= 0.60) %>%
group_by(loiczid, taxa) %>%
summarize(n_spp = n(),
mean_risk = mean(cat_score))
write_csv(spp_cells_risk_sum, cell_risk_by_taxa_file)loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
### gotta force the mean_risk column to be double; there are lots of zero
### values so will default to thinking that column is integer.
risk_by_cell_by_taxa <- read_csv(file.path(dir_data, 'risk_by_cell_by_taxa.csv'),
col_types = 'dcdd')
taxa_list <- risk_by_cell_by_taxa$taxa %>% unique()
for(taxa_gp in taxa_list) {
### taxa_gp <- 'mammal'
tmp <- risk_by_cell_by_taxa %>%
filter(taxa == taxa_gp) %>%
filter(n_spp >= 0)
risk_rast <- subs(loiczid_rast, tmp,
by = 'loiczid', which = 'mean_risk')
nspp_rast <- subs(loiczid_rast, tmp,
by = 'loiczid', which = 'n_spp')
plot(risk_rast, main = paste0('Mean risk for spp in ', taxa_gp, ' (n >= 0)'))
plot(nspp_rast, main = paste0('Spp count for spp in ', taxa_gp, ' (n >= 0)'))
}# prov_wrapup(commit_outputs = FALSE)